{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Train error vs Test error\n\n\nIllustration of how the performance of an estimator on unseen data (test data)\nis not the same as the performance on training data. As the regularization\nincreases the performance on train decreases while the performance on test\nis optimal within a range of values of the regularization parameter.\nThe example with an Elastic-Net regression model and the performance is\nmeasured using the explained variance a.k.a. R^2.\n\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "print(__doc__)\n\n# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>\n# License: BSD 3 clause\n\nimport numpy as np\nfrom sklearn import linear_model\n\n# #############################################################################\n# Generate sample data\nn_samples_train, n_samples_test, n_features = 75, 150, 500\nnp.random.seed(0)\ncoef = np.random.randn(n_features)\ncoef[50:] = 0.0  # only the top 10 features are impacting the model\nX = np.random.randn(n_samples_train + n_samples_test, n_features)\ny = np.dot(X, coef)\n\n# Split train and test data\nX_train, X_test = X[:n_samples_train], X[n_samples_train:]\ny_train, y_test = y[:n_samples_train], y[n_samples_train:]\n\n# #############################################################################\n# Compute train and test errors\nalphas = np.logspace(-5, 1, 60)\nenet = linear_model.ElasticNet(l1_ratio=0.7, max_iter=10000)\ntrain_errors = list()\ntest_errors = list()\nfor alpha in alphas:\n    enet.set_params(alpha=alpha)\n    enet.fit(X_train, y_train)\n    train_errors.append(enet.score(X_train, y_train))\n    test_errors.append(enet.score(X_test, y_test))\n\ni_alpha_optim = np.argmax(test_errors)\nalpha_optim = alphas[i_alpha_optim]\nprint(\"Optimal regularization parameter : %s\" % alpha_optim)\n\n# Estimate the coef_ on full data with optimal regularization parameter\nenet.set_params(alpha=alpha_optim)\ncoef_ = enet.fit(X, y).coef_\n\n# #############################################################################\n# Plot results functions\n\nimport matplotlib.pyplot as plt\nplt.subplot(2, 1, 1)\nplt.semilogx(alphas, train_errors, label='Train')\nplt.semilogx(alphas, test_errors, label='Test')\nplt.vlines(alpha_optim, plt.ylim()[0], np.max(test_errors), color='k',\n           linewidth=3, label='Optimum on test')\nplt.legend(loc='lower left')\nplt.ylim([0, 1.2])\nplt.xlabel('Regularization parameter')\nplt.ylabel('Performance')\n\n# Show estimated coef_ vs true coef\nplt.subplot(2, 1, 2)\nplt.plot(coef, label='True coef')\nplt.plot(coef_, label='Estimated coef')\nplt.legend()\nplt.subplots_adjust(0.09, 0.04, 0.94, 0.94, 0.26, 0.26)\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}